package com.esd.core;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import javax.annotation.Resource;

import org.apache.log4j.Logger;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.data.mongodb.core.query.Criteria;
import org.springframework.data.mongodb.core.query.Query;
import org.springframework.stereotype.Component;

import com.esd.collection.DbFile;
import com.esd.collection.Downloads;
import com.esd.collection.Site;
import com.esd.collection.Urls;
import com.esd.common.CatDao;
import com.esd.common.MongoDBUtil;
import com.esd.config.BaseConfig;
import com.esd.config.PageConfig;
import com.esd.controller.site.SiteController;
import com.esd.dao.MongoDBDao;
import com.esd.download.EsdDownLoadHtml;
import com.esd.util.Md5;
import com.esd.util.SpringContextUtil;
import com.esd.util.Util;

@Component
public class CollectionPage {

	private static Logger logger = Logger.getLogger(CollectionPage.class);

	@Resource
	private MongoDBUtil mongoDBUtil;
	@Resource
	private CatDao dao;
	private boolean collectStatic = true;
	private boolean ctrl = true;

	private Thread thread = new A();

	// private Thread thread1 = new A();
	// private Thread thread2 = new A();
	// private Thread thread3 = new A();
	// private Thread thread4 = new A();
	// private Thread thread5 = new A();
	// private Thread thread6 = new A();
	// private Thread thread7 = new A();
	// private Thread thread8 = new A();
	// private Thread thread9 = new A();

	public void start() {

		thread = new A(SiteController.siteId);
		thread.setPriority(Thread.MAX_PRIORITY);
		thread.start();
		// thread1 = new A(SiteController.siteId);
		// thread1.start();
		// thread2 = new A(SiteController.siteId);
		// thread2.start();
		// thread3 = new A(SiteController.siteId);
		// thread3.start();
		// thread4 = new A(SiteController.siteId);
		// thread4.start();
		// thread5 = new A(SiteController.siteId);
		// thread5.start();
		// thread6 = new A(SiteController.siteId);
		// thread6.start();
		// thread7 = new A(SiteController.siteId);
		// thread7.start();
		// thread8 = new A(SiteController.siteId);
		// thread8.start();
		// thread9 = new A(SiteController.siteId);
		// thread9.start();
	}

	private class A extends Thread {
		private String siteId;

		public A(String siteId) {
			this.siteId = siteId;
		}

		public A() {

		}

		@Override
		public void run() {
			// 计时
			SiteController.siteId = siteId;
			// 20161102-cx 首页源码插入站点表

			Criteria criatira = new Criteria();
			criatira.andOperator(Criteria.where("id").is(siteId));

			Site site = mongoDBUtil.findOneByCollectionName("sites", criatira, Site.class);
			String[] domain = site.getDomainName().split(",");

			// Long m = mongoDBUtil.getHtmlsCount();
			// 20170331 加载bean
			// MongoDBDao mdd =
			// (MongoDBDao)SpringContextUtil.getBean1("mongoDBDao");
			// MongoDBUtil mdu =
			// (MongoDBUtil)SpringContextUtil.getBean1("mongoDBUtil");
			while (collectStatic && ctrl) {
				ctrl = collect(siteId, domain);
			}
			// 结束加载的bean
			// SpringContextUtil.closeA();
			destroySource();// 释放资源
			logger.info("采集线程结束！！！！！！！！！！！！！！！！！");
		}

	}

	public CollectionPage() {

	}

	public void init(String domain, String siteId) {
		collectStatic = true;
		ctrl = true;
		mongoDBUtil.dropTable();
		// cx-20170920
		String[] site = domain.split(",");

		for (int i = 0; i < site.length; i++) {
			mongoDBUtil.downloadsInsert(site[i]);// 插入主页
		}

		for (int i = 0; i < BaseConfig.str.length; i++) {
			mongoDBUtil.downloadsInsert(BaseConfig.str[i]);
		}
		dao.collectPageConfig(siteId);
	}

	/**
	 * 前置下载
	 * 
	 * @return
	 */
	public boolean collect(String siteId, String[] domain) {

		Long l = System.currentTimeMillis();
		Downloads bson = mongoDBUtil.downloadsFindAndDeleteOne();
		if (bson == null) {
			dao.singlCat(null, null, null, false, domain);
			return false;
		}
		String url = bson.getUrl();
		// 通过数局库获取url
		if (url == null) {
			dao.singlCat(null, null, null, false, domain);
			return false;
		}
		if (Util.isOutUrl(url, siteId, domain)) {
			Document doc;
			// doc = Util.loadTemplate(BaseConfig.TEMPLATE_ROOT + File.separator
			// + "error.html");
			// 20161020-cx
			Criteria criatira = new Criteria();
			criatira.andOperator(Criteria.where("fileName").is("error.html"));
			DbFile df = mongoDBUtil.findOneByCollectionName(SiteController.siteId + "_template", criatira, DbFile.class);

			String file = new String(df.getFileByte());
			doc = Jsoup.parse(file);
			doc.select("#error").attr("href", url);
			String mName = Util.interceptUrl(url);
			// String path = BaseConfig.HTML_ROOT + File.separator + mName;
			mongoDBUtil.insertFile(mName, doc.html().getBytes(), "/html/" + mName, "html", null);

			mongoDBUtil.insertFileRecord("警告", "外链接", "pg无", "节点无", 0, "无", Md5.getMd5(url), url, "外链接CollectionPage==>collect");
			// try {
			// Util.createNewFile(doc.html(), path);
			// } catch (IOException e1) {
			// e1.printStackTrace();
			// }
			return true;
		}
		// 采集的数据已存在就不采集了??????????????
		// String id = Md5.getMd5(url) + ".html";
		// Criteria criatira = new Criteria();
		// criatira.andOperator(Criteria.where("_id").is(id));
		//
		// long m = mongoDBUtil.getHtmlCount(new Query(criatira));
		// if(m > 0){
		// return true;
		// }
		PageConfig pageConfig = dao.findPageConfig(url);

		Document htmlSource = null;
		if (pageConfig != null) {
			EsdDownLoadHtml down = new EsdDownLoadHtml();// 下载4
			pageConfig.setUrl(url);
			htmlSource = down.downloadHtml(pageConfig, dao, domain, mongoDBUtil);// 下载源代码,添加源码MD5文件
		} else {
			try {
				logger.info("pageConfig is null: " + url);
				Connection jsoup = Jsoup.connect(url);
				htmlSource = jsoup.get();
			} catch (IOException e) {
				logger.error("download error: " + url);
			}
		}
		if (htmlSource == null) {
			return true;
		}
		//System.out.println(htmlSource.html().toString());
		Elements links = htmlSource.select("a[href],area[href],iframe[src]");
		// cx20171114 处理重复的url
//		List<String> list = new ArrayList<>();
//		boolean flag;
		String jsurl = null;

		String title = docisnull(htmlSource);
		for (Element e : links) {
			//System.out.println("**e**:"+e.toString());
			String href = e.attr("abs:href").trim();
			if (href.equals("")) {
				href = e.attr("href").trim();
				if(href.equals("")){
					href = e.attr("abs:src").trim();
					if (href == null) {
						continue;
					}
				}	
			}
			//System.out.println("**href**:"+href);
			//System.out.println("href:"+href);
			// 过滤
			String s = dao.filterSuffix(href, domain);
			//logger.info("allin*************:"+s);
			// cx20171114 url重复就continue
//			flag = false;
//			if (list.size() > 0 && s != null) {
//				for (Iterator<String> iterator = list.iterator(); iterator.hasNext();) {
//					String str = (String) iterator.next();
//					if (str != null) {
//						if (str.equals(s)) {
//							flag = true;
//							break;
//						}
//					}
//
//				}
//			}
//			if (flag) {
//				flag = false;
//				continue;
//			}
//			list.add(s);
//			jsurl = null;
			if (s != null) {
				//logger.info("s**********:"+s);
				
				if (s.startsWith("javascript:")) {
					// System.out.println("s:"+s);
					if (s.startsWith("javascript:showDetail333")) {
						jsurl = "http://spjc.bjmtg.gov.cn/hrapp/webanser/savebody.jsp";
						htmlSource.getElementsByAttributeValue("href", s).attr("href", jsurl);
					}
					if (s.startsWith("javascript:ckxq()")) {
						String defname = encodeChinese(htmlSource.getElementsByAttributeValue("name", "defname").get(0).val());					
						jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?defname="+ defname +"&operation="+htmlSource.getElementById("selForm").getElementsByAttributeValue("name", "operation").get(0).val();
						htmlSource.getElementsByAttributeValue("href", s).attr("href", jsurl);
					}
					if (s.startsWith("javascript:showDetail666")) {
						if(s.split("\'").length > 1){
							jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=view1&defname=" + encodeChinese(s.split("\'")[1]);
							htmlSource.getElementsByAttributeValue("href", s).attr("href", jsurl);
						}
						if(s.split("\"").length > 1){
							jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=view1&defname=" + encodeChinese(s.split("\"")[1]);
							htmlSource.getElementsByAttributeValue("href", s).attr("href", jsurl);
						}
						
					}
					if (s.startsWith("javascript:showDetail(")) {
						if(s.split("\'").length > 1){
							if(s.split("\'").length == 3){
								jsurl = "http://spjc.bjmtg.gov.cn/hrapp/article/articleDetail.jsp?id=" + s.split("\'")[1];
								htmlSource.getElementsByAttributeValue("href", s).attr("href", jsurl);
							}else if(s.split("\'").length == 7){
								if (s.split("\'")[5].equals("1")) {
									jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=view&deptId=" + s.split("\'")[3] + "&id=" + s.split("\'")[1];
									htmlSource.getElementsByAttributeValue("href", s).attr("href", jsurl);
								} else if (s.split("\'")[5].equals("2")) {
									jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=view&deptId=" + s.split("\'")[3] + "&id=" + s.split("\'")[1] + "#isml";
									htmlSource.getElementsByAttributeValue("href", s).attr("href", jsurl);

								}
							}
						}
						if(s.split("\"").length > 1){
							if(s.split("\"").length == 3){
								jsurl = "http://spjc.bjmtg.gov.cn/hrapp/article/articleDetail.jsp?id=" + s.split("\"")[1];
								htmlSource.getElementsByAttributeValue("href", s).attr("href", jsurl);
							}else if(s.split("\"").length == 7){
								if (s.split("\"")[5].equals("1")) {
									jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=view&deptId=" + s.split("\"")[3] + "&id=" + s.split("\"")[1];
									htmlSource.getElementsByAttributeValue("href", s).attr("href", jsurl);
								} else if (s.split("\"")[5].equals("2")) {
									jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=view&deptId=" + s.split("\"")[3] + "&id=" + s.split("\"")[1] + "#isml";
									htmlSource.getElementsByAttributeValue("href", s).attr("href", jsurl);

								}
							}
						}
					}
					if (s.startsWith("javascript:showDetailpolicy(")) {
						if(s.split("\'").length > 1){
							jsurl = "http://spjc.bjmtg.gov.cn/publicaction?operation=showPolicyInfo&lngitem=" + s.split("\'")[1];
							htmlSource.getElementsByAttributeValue("href", s).attr("href", jsurl);
						}
						if(s.split("\"").length > 1){
							jsurl = "http://spjc.bjmtg.gov.cn/publicaction?operation=showPolicyInfo&lngitem=" + s.split("\"")[1];
							htmlSource.getElementsByAttributeValue("href", s).attr("href", jsurl);
						}
						
					}
					if (s.startsWith("javascript:showList(")) {
						if(s.split("\'").length > 1){
							jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=list&Station=tgfwdwbj&deptId=" + s.split("\'")[1];
							htmlSource.getElementsByAttributeValue("href", s).attr("href", jsurl);
						}
						if(s.split("\"").length > 1){
							jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=list&Station=tgfwdwbj&deptId=" + s.split("\"")[1];
							htmlSource.getElementsByAttributeValue("href", s).attr("href", jsurl);
						}
					}
					//logger.info("jsurl**************************:"+jsurl);
					
				}else{
					jsurl = s;
				}
			}
			// 保存url到数数库
			//logger.info("downloadsInsert:"+jsurl);
			mongoDBUtil.downloadsInsert(jsurl);

		}
//		list.clear();
		// 插入数据库
		Urls urlsCollection = new Urls();
		urlsCollection.setUrl(bson.getUrl());
		if (pageConfig != null && htmlSource != null) {
			// try {
			dao.singlCat(siteId, pageConfig, htmlSource, true, domain);
			urlsCollection.setState("1");// 已处理
			// } catch (Exception e) {
			// urlsCollection.setState("-1");// 已处理，发生错误
			// logger.error("singlCat***" + url);
			// logger.error(e.getStackTrace());
			// }
			// log.info(url +
			// "==============Processing time==================>[" +
			// (System.currentTimeMillis() - l) + "]");
			logger.debug(url + "===[" + (System.currentTimeMillis() - l) + "]" + "===template[" + pageConfig.getTemplate() + "]===rule[" + pageConfig.getDb() + ":" + pageConfig.getRule() + "]");
		} else {
			if (pageConfig == null && htmlSource != null) {
				logger.info(url + "==pageConfig is null htmlSource is not null==>");
				mongoDBUtil.insertFileRecord("警告", "无", "无pageConfig", "pageConfig is null", 0, "无", Md5.getMd5(url), url, "==>CollectionPage==>collect==》");
			}
			if (pageConfig != null && htmlSource == null) {
				logger.info(url + "==pageConfig is not null htmlSource is null==>" + pageConfig.getDb() + "==>" + pageConfig.getRule() + "==>" + pageConfig.getTemplate());
				mongoDBUtil.insertFileRecord("警告", "无", "无htmlSource", "htmlSource is null", 0, "无", Md5.getMd5(url), url, pageConfig.getDb() + "==>CollectionPage==>collect");
			}
			if (pageConfig == null && htmlSource == null) {
				logger.info(url + "==pageConfig and htmlSource is null==>");
				mongoDBUtil.insertFileRecord("警告", "无", "pageConfig is null", "htmlSource is null", 0, "无", Md5.getMd5(url), url, "==>CollectionPage==>collect==》");
			}
			urlsCollection.setState("0");// 未处理
		}
		mongoDBUtil.urlsInsert(urlsCollection, title);// 插入
		return true;
	}

	/**
	 * js url 汉字 处理 cx20171114
	 */
	public static String encodeChinese(String url) {
		String strnew = url;
		if (Util.isChineseCharacter(url)) {
			try {
				url = java.net.URLEncoder.encode(url, "GBK");
			} catch (UnsupportedEncodingException e) {
				e.printStackTrace();
			}
		}
		strnew = url;
		strnew = strnew.replaceAll("%26", "&");
		return strnew;
	}

	/**
	 * 释放以知所有资源
	 */
	private void destroySource() {
		mongoDBUtil.dropTable();
		System.gc();
	}

	public void setCollectStatic(boolean collectStatic) {
		this.collectStatic = collectStatic;
	}

	public String docisnull(Document doc) {
		String str = null;
		try {
			//str = doc.select("div.col-xs-12").first().text().trim();
			str = doc.select("title").text().trim();
		} catch (Exception e) {
			return null;
		}
		return str;
	}

}
